Let’s make some plotly plots
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(httr)
library(jsonlite)
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:purrr':
##
## flatten
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:httr':
##
## config
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
get_all_inspections = function(url) {
all_inspections = vector("list", length = 0)
loop_index = 1
chunk_size = 50000
DO_NEXT = TRUE
while (DO_NEXT) {
message("Getting data, page ", loop_index)
all_inspections[[loop_index]] =
GET(url,
query = list(`$order` = "zipcode",
`$limit` = chunk_size,
`$offset` = as.integer((loop_index - 1) * chunk_size)
)
) %>%
content("text") %>%
fromJSON() %>%
as_tibble()
DO_NEXT = dim(all_inspections[[loop_index]])[1] == chunk_size
loop_index = loop_index + 1
}
all_inspections
}
url = "https://data.cityofnewyork.us/resource/43nn-pn8j.json"
nyc_inspections =
get_all_inspections(url) %>%
bind_rows()
## Getting data, page 1
## Getting data, page 2
## Getting data, page 3
## Getting data, page 4
## Getting data, page 5
## Getting data, page 6
# Select relevant columns and clean the data
cleaned_data <- nyc_inspections %>%
select(boro, inspection_date, critical_flag, latitude, longitude, grade, grade_date) %>%
filter(!is.na(boro) & !is.na(inspection_date) & !is.na(latitude) & !is.na(longitude) & !is.na(grade)) %>%
mutate(inspection_date = ymd(inspection_date),
grade_date = ymd(grade_date))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `inspection_date = ymd(inspection_date)`.
## Caused by warning:
## ! All formats failed to parse. No formats found.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
# Count of grades per borough
grade_boro_count <- cleaned_data %>%
group_by(boro, grade) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'boro'. You can override using the
## `.groups` argument.
# Prepare data for heatmap
heatmap_data <- cleaned_data %>%
group_by(boro, grade) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'boro'. You can override using the
## `.groups` argument.
plot_ly(grade_boro_count, x = ~boro, y = ~count, color = ~grade, type = "bar") %>%
layout(title = "Distribution of Inspection Grades by Borough",
xaxis = list(title = "Borough"),
yaxis = list(title = "Count"))
plot_ly(cleaned_data, x = ~longitude, y = ~latitude, type = 'scatter', mode = 'markers',
color = ~grade, text = ~paste("Grade:", grade, "<br>Borough:", boro)) %>%
layout(title = "Inspection Locations by Grade",
xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))
plot_ly(heatmap_data, x = ~boro, y = ~grade, z = ~count, type = "heatmap", color = "blue") %>%
layout(title = "Heatmap of Grade Distribution by Borough",
xaxis = list(title = "Borough"),
yaxis = list(title = "Grade"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels